GroupLang · agentmarketbot · Jan 9, 2025 · Jan 9, 2025
diff --git a/bot_handlers.py b/bot_handlers.py
@@ -1,15 +1,19 @@
 import logging
 import os
 from typing import Dict, Any
-from services import AWSServices, AudioTranscriber, TextSummarizer
+from services import AWSServices, TranscriptionServiceFactory, TextSummarizer
 from utils.telegram_utils import send_message, get_telegram_file_url
 from utils.message_utils import format_response, create_tip_button
 
 logger = logging.getLogger(__name__)
 
 # Initialize services
 aws_services = AWSServices()
-audio_transcriber = AudioTranscriber(aws_services)
+transcription_service = TranscriptionServiceFactory.create_service(
+    os.environ.get('TRANSCRIPTION_SERVICE', 'aws'),
+    aws_services=aws_services,
+    api_key=os.environ.get('OPENAI_API_KEY')
+)
 text_summarizer = TextSummarizer(os.environ.get('MARKETROUTER_API_KEY'))
 
 def handle_update(update: Dict[str, Any]) -> None:
@@ -30,7 +34,7 @@ def handle_voice_message(message: Dict[str, Any], chat_id: int) -> None:
         file_id = message['voice']['file_id']
         file_url = get_telegram_file_url(file_id)
 
-        transcription = audio_transcriber.transcribe_audio(file_url)
+        transcription = transcription_service.transcribe_audio(file_url)
         summary, conversation_id = text_summarizer.summarize_text(transcription)
 
         logger.info(f"Processed voice message: file_id={file_id}, "

diff --git a/config.py b/config.py
@@ -6,3 +6,5 @@ class Config:
     MARKETROUTER_API_KEY = os.environ.get('MARKETROUTER_API_KEY')
     AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
     AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
+    OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
+    TRANSCRIPTION_SERVICE = os.environ.get('TRANSCRIPTION_SERVICE', 'aws')  # 'aws' or 'openai'
diff --git a/services.py b/services.py
@@ -4,11 +4,25 @@
 import time
 import uuid
 import logging
+import abc
+import openai
 from io import BytesIO
 from botocore.exceptions import ClientError
 
 logger = logging.getLogger(__name__)
 
+class TranscriptionService(abc.ABC):
+    @abc.abstractmethod
+    def transcribe_audio(self, file_url: str) -> str:
+        """Transcribe audio from the given URL."""
+        pass
+
+    def _download_audio(self, file_url: str) -> bytes:
+        """Download audio content from the given URL."""
+        response = requests.get(file_url)
+        response.raise_for_status()
+        return response.content
+
 class AWSServices:
     def __init__(self, region_name='us-east-1'):
         self.region_name = region_name
@@ -50,7 +64,7 @@ def start_transcription_job(self, job_name, media_uri, media_format='ogg', langu
     def get_transcription_job_status(self, job_name):
         return self.transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
 
-class AudioTranscriber:
+class AWSTranscriber(TranscriptionService):
     def __init__(self, aws_services: AWSServices):
         self.aws_services = aws_services
         self.bucket_name = 'audio-transcribe-temp'
@@ -77,11 +91,6 @@ def transcribe_audio(self, file_url: str) -> str:
             logger.error(f"An error occurred: {e}")
             raise
 
-    def _download_audio(self, file_url: str) -> bytes:
-        response = requests.get(file_url)
-        response.raise_for_status()
-        return response.content
-
     def _wait_for_transcription(self, job_name: str) -> str:
         while True:
             status = self.aws_services.get_transcription_job_status(job_name)
@@ -95,6 +104,49 @@ def _wait_for_transcription(self, job_name: str) -> str:
         else:
             raise Exception("Transcription failed")
 
+class OpenAITranscriber(TranscriptionService):
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        openai.api_key = api_key
+
+    def transcribe_audio(self, file_url: str) -> str:
+        try:
+            audio_content = self._download_audio(file_url)
+            with BytesIO(audio_content) as audio_file:
+                audio_file.name = "audio.ogg"  # OpenAI needs a filename
+                response = openai.Audio.transcribe(
+                    model="whisper-1",
+                    file=audio_file,
+                    response_format="text"
+                )
+                return response
+        except Exception as e:
+            logger.error(f"An error occurred during OpenAI transcription: {e}")
+            raise
+
+class TranscriptionServiceFactory:
+    @staticmethod
+    def create_service(service_type: str, **kwargs) -> TranscriptionService:
+        """
+        Create a transcription service based on the specified type.
+
+        Args:
+            service_type: Either 'aws' or 'openai'
+            **kwargs: Configuration parameters for the service
+                For AWS: aws_services (AWSServices instance)
+                For OpenAI: api_key (str)
+        """
+        if service_type.lower() == 'aws':
+            if 'aws_services' not in kwargs:
+                raise ValueError("aws_services is required for AWS transcription")
+            return AWSTranscriber(kwargs['aws_services'])
+        elif service_type.lower() == 'openai':
+            if 'api_key' not in kwargs:
+                raise ValueError("api_key is required for OpenAI transcription")
+            return OpenAITranscriber(kwargs['api_key'])
+        else:
+            raise ValueError(f"Unknown service type: {service_type}")
+
 class TextSummarizer:
     def __init__(self, api_key: str):
         self.api_key = api_key
@@ -186,4 +238,4 @@ def _get_headers(self) -> Dict[str, str]:
         return {
             'Content-Type': 'application/json',
             'x-api-key': self.api_key
-        }
+        }