talk2me/venv/lib/python3.11/site-packages/pyttsx3/drivers/_espeak.py
2025-04-04 13:23:15 -06:00

500 lines
18 KiB
Python

from __future__ import print_function
import time
from ctypes import (CFUNCTYPE, POINTER, Structure, Union, c_char_p, c_int,
c_long, c_short, c_ubyte, c_uint, c_ulong, c_void_p,
c_wchar, cdll)
def cfunc(name, dll, result, *args):
"""build and apply a ctypes prototype complete with parameter flags"""
atypes = []
aflags = []
for arg in args:
atypes.append(arg[1])
aflags.append((arg[2], arg[0]) + arg[3:])
return CFUNCTYPE(result, *atypes)((name, dll), tuple(aflags))
dll = None
def load_library():
global dll
paths = [
# macOS paths
'/usr/local/lib/libespeak-ng.1.dylib',
'/usr/local/lib/libespeak.dylib',
# Linux paths
'libespeak-ng.so.1',
'/usr/local/lib/libespeak-ng.so.1',
'libespeak.so.1',
# Windows paths
r'C:\Program Files\eSpeak NG\libespeak-ng.dll',
r'C:\Program Files (x86)\eSpeak NG\libespeak-ng.dll'
]
for path in paths:
try:
dll = cdll.LoadLibrary(path)
return True
except Exception:
continue # Try the next path
return False
try:
if not load_library():
raise RuntimeError("This means you probably do not have eSpeak or eSpeak-ng installed!")
except Exception as exp:
raise
# constants and such from speak_lib.h
EVENT_LIST_TERMINATED = 0
EVENT_WORD = 1
EVENT_SENTENCE = 2
EVENT_MARK = 3
EVENT_PLAY = 4
EVENT_END = 5
EVENT_MSG_TERMINATED = 6
class numberORname(Union):
_fields_ = [
('number', c_int),
('name', c_char_p)
]
class EVENT(Structure):
_fields_ = [
('type', c_int),
('unique_identifier', c_uint),
('text_position', c_int),
('length', c_int),
('audio_position', c_int),
('sample', c_int),
('user_data', c_void_p),
('id', numberORname)
]
AUDIO_OUTPUT_PLAYBACK = 0
AUDIO_OUTPUT_RETRIEVAL = 1
AUDIO_OUTPUT_SYNCHRONOUS = 2
AUDIO_OUTPUT_SYNCH_PLAYBACK = 3
EE_OK = 0
EE_INTERNAL_ERROR = -1
EE_BUFFER_FULL = 1
EE_NOT_FOUND = 2
Initialize = cfunc('espeak_Initialize', dll, c_int,
('output', c_int, 1, AUDIO_OUTPUT_PLAYBACK),
('bufflength', c_int, 1, 100),
('path', c_char_p, 1, None),
('option', c_int, 1, 0))
Initialize.__doc__ = """Must be called before any synthesis functions are called.
output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
buflength: The length in mS of sound buffers passed to the SynthCallback function.
path: The directory which contains the espeak-data directory, or NULL for the default location.
options: bit 0: 1=allow espeakEVENT_PHONEME events.
Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR)."""
t_espeak_callback = CFUNCTYPE(c_int, POINTER(c_short), c_int, POINTER(EVENT))
cSetSynthCallback = cfunc('espeak_SetSynthCallback', dll, None,
('SynthCallback', t_espeak_callback, 1))
SynthCallback = None
def SetSynthCallback(cb):
global SynthCallback
SynthCallback = t_espeak_callback(cb)
cSetSynthCallback(SynthCallback)
SetSynthCallback.__doc__ = """Must be called before any synthesis functions are called.
This specifies a function in the calling program which is called when a buffer of
speech sound data has been produced.
The callback function is of the form:
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
wav: is the speech sound data which has been produced.
NULL indicates that the synthesis has been completed.
numsamples: is the number of entries in wav. This number may vary, may be less than
the value implied by the buflength parameter given in espeak_Initialize, and may
sometimes be zero (which does NOT indicate end of synthesis).
events: an array of espeak_EVENT items which indicate word and sentence events, and
also the occurance if <mark> and <audio> elements within the text.
Callback returns: 0=continue synthesis, 1=abort synthesis."""
t_UriCallback = CFUNCTYPE(c_int, c_int, c_char_p, c_char_p)
cSetUriCallback = cfunc('espeak_SetUriCallback', dll, None,
('UriCallback', t_UriCallback, 1))
UriCallback = None
def SetUriCallback(cb):
global UriCallback
UriCallback = t_UriCallback(UriCallback)
cSetUriCallback(UriCallback)
SetUriCallback.__doc__ = """This function must be called before synthesis functions are used, in order to deal with
<audio> tags. It specifies a callback function which is called when an <audio> element is
encountered and allows the calling program to indicate whether the sound file which
is specified in the <audio> element is available and is to be played.
The callback function is of the form:
int UriCallback(int type, const char *uri, const char *base);
type: type of callback event. Currently only 1= <audio> element
uri: the "src" attribute from the <audio> element
base: the "xml:base" attribute (if any) from the <speak> element
Return: 1=don't play the sound, but speak the text alternative.
0=place a PLAY event in the event list at the point where the <audio> element
occurs. The calling program can then play the sound at that point."""
# a few manifest constants
CHARS_AUTO = 0
CHARS_UTF8 = 1
CHARS_8BIT = 2
CHARS_WCHAR = 3
SSML = 0x10
PHONEMES = 0x100
ENDPAUSE = 0x1000
KEEP_NAMEDATA = 0x2000
POS_CHARACTER = 1
POS_WORD = 2
POS_SENTENCE = 3
def Synth(text, position=0, position_type=POS_CHARACTER, end_position=0, flags=0, user_data=None):
return cSynth(text, len(text) * 10, position, position_type, end_position, flags, None, user_data)
cSynth = cfunc('espeak_Synth', dll, c_int,
('text', c_char_p, 1),
('size', c_long, 1),
('position', c_uint, 1, 0),
('position_type', c_int, 1, POS_CHARACTER),
('end_position', c_uint, 1, 0),
('flags', c_uint, 1, CHARS_AUTO),
('unique_identifier', POINTER(c_uint), 1, None),
('user_data', c_void_p, 1, None))
Synth.__doc__ = """Synthesize speech for the specified text. The speech sound data is passed to the calling
program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
parameter.
size: Equal to (or greater than) the size of the text data, in bytes. This is used in order
to allocate internal storage space for the text. This value is not used for
AUDIO_OUTPUT_SYNCHRONOUS mode.
position: The position in the text where speaking starts. Zero indicates speak from the
start of the text.
position_type: Determines whether "position" is a number of characters, words, or sentences.
Values:
end_position: If set, this gives a character position at which speaking will stop. A value
of zero indicates no end position.
flags: These may be OR'd together:
Type of character codes, one of:
espeak.CHARS_UTF8 UTF8 encoding
espeak.CHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
espeak.CHARS_AUTO 8 bit or UTF8 (this is the default)
espeak.CHARS_WCHAR Wide characters (wchar_t)
espeak.SSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
espeak.PHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirschenbaum encoding).
espeak.ENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
this pause is suppressed.
unique_identifier: message identifier; helpful for identifying later
data supplied to the callback.
user_data: pointer which will be passed to the callback function.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
def Synth_Mark(text, index_mark, end_position=0, flags=CHARS_AUTO):
cSynth_Mark(text, len(text) + 1, index_mark, end_position, flags)
cSynth_Mark = cfunc('espeak_Synth_Mark', dll, c_int,
('text', c_char_p, 1),
('size', c_ulong, 1),
('index_mark', c_char_p, 1),
('end_position', c_uint, 1, 0),
('flags', c_uint, 1, CHARS_AUTO),
('unique_identifier', POINTER(c_uint), 1, None),
('user_data', c_void_p, 1, None))
Synth_Mark.__doc__ = """Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
specified by the name of a <mark> element in the text.
index_mark: The "name" attribute of a <mark> element within the text which specified the
point at which synthesis starts. UTF8 string.
For the other parameters, see espeak_Synth()
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
Key = cfunc('espeak_Key', dll, c_int,
('key_name', c_char_p, 1))
Key.__doc__ = """Speak the name of a keyboard key.
Currently this just speaks the "key_name" as given
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
Char = cfunc('espeak_Char', dll, c_int,
('character', c_wchar, 1))
Char.__doc__ = """Speak the name of the given character
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
# Speech Parameters
SILENCE = 0 # internal use
RATE = 1
VOLUME = 2
PITCH = 3
RANGE = 4
PUNCTUATION = 5
CAPITALS = 6
EMPHASIS = 7 # internal use
LINELENGTH = 8 # internal use
PUNCT_NONE = 0
PUNCT_ALL = 1
PUNCT_SOME = 2
SetParameter = cfunc('espeak_SetParameter', dll, c_int,
('parameter', c_int, 1),
('value', c_int, 1),
('relative', c_int, 1, 0))
SetParameter.__doc__ = """Sets the value of the specified parameter.
relative=0 Sets the absolute value of the parameter.
relative=1 Sets a relative value of the parameter.
parameter:
espeak.RATE: speaking speed in word per minute.
espeak.VOLUME: volume in range 0-100 0=silence
espeak.PITCH: base pitch, range 0-100. 50=normal
espeak.RANGE: pitch range, range 0-100. 0-monotone, 50=normal
espeak.PUNCTUATION: which punctuation characters to announce:
value in espeak_PUNCT_TYPE (none, all, some),
see espeak_GetParameter() to specify which characters are announced.
espeak.CAPITALS: announce capital letters by:
0=none,
1=sound icon,
2=spelling,
3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
of a word raised to indicate it has a capital letter.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
GetParameter = cfunc('espeak_GetParameter', dll, c_int,
('parameter', c_int, 1))
GetParameter.__doc__ = """current=0 Returns the default value of the specified parameter.
current=1 Returns the current value of the specified parameter, as set by SetParameter()"""
SetPunctuationList = cfunc('espeak_SetPunctuationList', dll, c_int,
('punctlist', c_wchar, 1))
SetPunctuationList.__doc__ = """Specified a list of punctuation characters whose names are
to be spoken when the value of the Punctuation parameter is set to "some".
punctlist: A list of character codes, terminated by a zero character.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
SetPhonemeTrace = cfunc('espeak_SetPhonemeTrace', dll, None,
('value', c_int, 1),
('stream', c_void_p, 1))
SetPhonemeTrace.__doc__ = """Controls the output of phoneme symbols for the text
value=0 No phoneme output (default)
value=1 Output the translated phoneme symbols for the text
value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries)
stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout."""
CompileDictionary = cfunc('espeak_CompileDictionary', dll, None,
('path', c_char_p, 1),
('log', c_void_p, 1))
CompileDictionary.__doc__ = """Compile pronunciation dictionary for a language which corresponds to the currently
selected voice. The required voice should be selected before calling this function.
path: The directory which contains the language's '_rules' and '_list' files.
'path' should end with a path separator character ('/').
log: Stream for error reports and statistics information. If log=NULL then stderr will be used."""
class VOICE(Structure):
_fields_ = [
('name', c_char_p),
('languages', c_char_p),
('identifier', c_char_p),
('gender', c_ubyte),
('age', c_ubyte),
('variant', c_ubyte),
('xx1', c_ubyte),
('score', c_int),
('spare', c_void_p),
]
def __repr__(self):
"""Print the fields"""
res = []
for field in self._fields_:
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
cListVoices = cfunc('espeak_ListVoices', dll, POINTER(POINTER(VOICE)),
('voice_spec', POINTER(VOICE), 1))
cListVoices.__doc__ = """Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
The list is terminated by a NULL pointer
If voice_spec is NULL then all voices are listed.
If voice spec is given, then only the voices which are compatible with the voice_spec
are listed, and they are listed in preference order."""
def ListVoices(voice_spec=None):
"""Reads the voice files from espeak-data/voices and returns a list of VOICE objects.
If voice_spec is None then all voices are listed.
If voice spec is given, then only the voices which are compatible with the voice_spec
are listed, and they are listed in preference order."""
ppv = cListVoices(voice_spec)
res = []
i = 0
while ppv[i]:
res.append(ppv[i][0])
i += 1
return res
SetVoiceByName = cfunc('espeak_SetVoiceByName', dll, c_int,
('name', c_char_p, 1))
SetVoiceByName.__doc__ = """Searches for a voice with a matching "name" field. Language is not considered.
"name" is a UTF8 string.
Return: EE_OK: operation achieved
EE_BUFFER_FULL: the command can not be buffered;
you may try after a while to call the function again.
EE_INTERNAL_ERROR."""
SetVoiceByProperties = cfunc('espeak_SetVoiceByProperties', dll, c_int,
('voice_spec', POINTER(VOICE), 1))
SetVoiceByProperties.__doc__ = """An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
fields may be set:
name NULL, or a voice name
languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
gender 0=not specified, 1=male, 2=female
age 0=not specified, or an age in years
variant After a list of candidates is produced, scored and sorted, "variant" is used to index
that list and choose a voice.
variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc"""
GetCurrentVoice = cfunc('espeak_GetCurrentVoice', dll, POINTER(VOICE),
)
GetCurrentVoice.__doc__ = """Returns the espeak_VOICE data for the currently selected voice.
This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>"""
Cancel = cfunc('espeak_Cancel', dll, c_int)
Cancel.__doc__ = """Stop immediately synthesis and audio output of the current text. When this
function returns, the audio output is fully stopped and the synthesizer is ready to
synthesize a new message.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR."""
IsPlaying = cfunc('espeak_IsPlaying', dll, c_int)
IsPlaying.__doc__ = """Returns 1 if audio is played, 0 otherwise."""
Synchronize = cfunc('espeak_Synchronize', dll, c_int)
Synchronize.__doc__ = """This function returns when all data have been spoken.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR."""
Terminate = cfunc('espeak_Terminate', dll, c_int)
Terminate.__doc__ = """last function to be called.
Return: EE_OK: operation achieved
EE_INTERNAL_ERROR."""
Info = cfunc('espeak_Info', dll, c_char_p, ('ptr', c_void_p, 1, 0))
Info.__doc__ = """Returns the version number string.
The parameter is for future use, and should be set to NULL"""
if __name__ == '__main__':
def synth_cb(wav, numsample, events):
print(numsample, end="")
i = 0
while True:
if events[i].type == EVENT_LIST_TERMINATED:
break
print(events[i].type, end="")
i += 1
return 0
samplerate = Initialize(output=AUDIO_OUTPUT_PLAYBACK)
SetSynthCallback(synth_cb)
s = 'This is a test, only a test. '
uid = c_uint(0)
# print 'pitch=',GetParameter(PITCH)
# SetParameter(PITCH, 50, 0)
print(Synth(s))
while IsPlaying():
time.sleep(0.1)