-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeepgramCommunication.py
100 lines (80 loc) · 3.29 KB
/
deepgramCommunication.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions, FileSource
import logging
logger = logging.getLogger(__name__)
preRecordedOptions = PrerecordedOptions(
model='nova-2',
smart_format=True
)
speakOptions = SpeakOptions(
model='aura-asteria-en',
encoding='linear16',
container='wav'
)
class DeepgramAssistant:
"""
A class that represents a Deepgram Assistant.
Attributes:
client (DeepgramClient): The Deepgram client used for communication.
Debug (bool): A flag indicating whether debug mode is enabled.
Methods:
__init__(self, DEBUG=False, voice="aura-asteria-en"): Initializes a new instance of the DeepgramAssistant class.
changeVoice(self, voice): Changes the voice model used for speech synthesis.
speak(self, message, filename='./talk.wav'): Generates speech from the given message and saves it to a file.
listen(self, filename): Transcribes the audio from the given file.
"""
def __init__(self, DEBUG=False, voice="aura-asteria-en"):
"""
Initializes a new instance of the DeepgramAssistant class.
Args:
DEBUG (bool, optional): A flag indicating whether debug mode is enabled. Defaults to False.
voice (str, optional): The voice model to use for speech synthesis. Defaults to "aura-asteria-en".
"""
self.client = DeepgramClient(api_key=os.getenv('DEEPGRAM_API_KEY'))
speakOptions.model = voice
self.Debug=DEBUG
def changeVoice(self, voice):
"""
Changes the voice model used for speech synthesis.
Args:
voice (str): The voice model to use.
"""
logger.info(f"Changing voice to {voice}")
speakOptions.model = voice
def speak(self, message, filename='./talk.wav'):
"""
Generates speech from the given message and saves it to a file.
Args:
message (str): The message to convert to speech.
filename (str, optional): The filename to save the speech to. Defaults to './talk.wav'.
Returns:
response: The response object from the Deepgram API.
"""
logger.info(f"Speaking: {message}")
speakSource = {
"text": message
}
response = self.client.speak.v('1').save(filename, speakSource, speakOptions)
logger.info(f"Speech saved response:\n{response.to_json(indent=2)}")
if self.Debug:
print(response.to_json(indent=2))
return response
def listen(self, filename):
"""
Transcribes the audio from the given file.
Args:
filename (str): The filename of the audio file to transcribe.
Returns:
response: The response object from the Deepgram API.
"""
logger.info(f"Listening to {filename}")
with open(filename, "rb") as file:
buffer_data = file.read()
payload: FileSource = {
"buffer": buffer_data,
}
response = self.client.listen.prerecorded.v("1").transcribe_file(payload, preRecordedOptions)
logger.info(f"Transcription response:\n{response.to_json(indent=2)}")
if self.Debug:
print(response.to_json(indent=2))
return response