-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
119 lines (100 loc) · 3.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import logging
import queue
from io import BytesIO
import torch
import torchaudio as ta
from df.enhance import enhance, init_df
from faster_whisper import WhisperModel
from ffmpeg import FFmpeg
from telegram.ext import Updater, MessageHandler, Filters
import config
import utils
class Task:
def __init__(self, chat_id, message_id, file_id=None):
self.chat_id = chat_id
self.message_id = message_id
self.file_id = file_id
def download_file(self):
tg_file = updater.bot.get_file(self.file_id)
file = BytesIO()
tg_file.download(out=file)
return file.getvalue()
def on_audio(update, context):
new_task = Task(update.effective_chat.id, update.message.message_id)
if update.message.voice:
new_task.file_id = update.message.voice.file_id
elif update.message.audio:
new_task.file_id = update.message.audio.file_id
else:
new_task.file_id = update.message.document.file_id
task_queue.put(new_task)
context.bot.send_message(
chat_id=new_task.chat_id,
reply_to_message_id=new_task.message_id,
text=f'Файл принят на обработку! Ваше место в очереди: {task_queue.qsize()}'
)
def info(update, context):
context.bot.send_message(
chat_id=update.effective_chat.id,
text=f'Это бот для распознавания речи. Пришлите голосовое сообщение или аудиофайл, чтобы начать работу.'
)
def load_audio_bytes(b):
file = BytesIO(b)
audio, _ = ta.load(file)
return audio.contiguous()
def save_audio_bytes(audio):
file = BytesIO()
audio = torch.as_tensor(audio)
if audio.ndim == 1:
audio.unsqueeze_(0)
if audio.dtype != torch.int16:
audio = (audio * (1 << 15)).to(torch.int16)
ta.save(file, audio, df_state.sr(), format='wav')
file.seek(0)
return file
updater = Updater(config.TG_BOT_TOKEN, use_context=True)
updater.dispatcher.add_handler(MessageHandler(
Filters.voice | Filters.audio | Filters.document.category('audio/'),
on_audio
))
updater.dispatcher.add_handler(MessageHandler(Filters.all, info))
task_queue = queue.Queue()
df_model, df_state, _ = init_df()
whisper = WhisperModel(config.WHISPER_MODEL, device=config.WHISPER_DEVICE)
# Нормализуем громкость и меняем частоту дискретизации на нужную для DeepFilterNet
ffmpeg = (
FFmpeg()
.option('y')
.input('pipe:0')
.output(
'pipe:1',
{'codec:a': 'pcm_s16le', 'filter:a': 'loudnorm'},
ar=df_state.sr(),
f='wav'
)
)
updater.start_polling()
while True:
current_task = task_queue.get()
try:
audio_bytes = current_task.download_file()
normalized = ffmpeg.execute(audio_bytes)
audio_tensor = load_audio_bytes(normalized)
cleaned_tensor = enhance(df_model, df_state, audio_tensor)
cleaned = save_audio_bytes(cleaned_tensor)
segments, info = whisper.transcribe(cleaned, vad_filter=True)
segments = [segment.text for segment in segments]
utils.remove_hallucinations(segments, info.language)
text = ''.join(segments).strip()
updater.bot.send_message(
chat_id=current_task.chat_id,
reply_to_message_id=current_task.message_id,
text=f'✅ Распознанная речь:\n{text}'
)
except Exception:
logging.exception('bot')
updater.bot.send_message(
chat_id=current_task.chat_id,
reply_to_message_id=current_task.message_id,
text='❌ Не удалось выполнить обработку файла'
)