diff --git a/packages/app/app/[organization]/layout.tsx b/packages/app/app/[organization]/layout.tsx index 0adbf8dcd..89bbb2b1d 100644 --- a/packages/app/app/[organization]/layout.tsx +++ b/packages/app/app/[organization]/layout.tsx @@ -33,6 +33,7 @@ const Layout = async ({ const userData = await fetchUserAction(); + console.log(organization); if (!organization) { return NotFound(); } diff --git a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/ClipContext.tsx b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/ClipContext.tsx index 1e7eea736..5bbb6ab99 100644 --- a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/ClipContext.tsx +++ b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/ClipContext.tsx @@ -103,10 +103,8 @@ export const ClipProvider = ({ organizationId: string; clipUrl: string; }) => { - const { handleTermChange, searchParams } = useSearchParams(); + const { searchParams } = useSearchParams(); - const start = searchParams?.get('start'); - const end = searchParams?.get('end'); const [playbackStatus, setPlaybackStatus] = useState( null ); diff --git a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/page.tsx b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/page.tsx index 0497c70cf..0e8bb993c 100644 --- a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/page.tsx +++ b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/page.tsx @@ -1,9 +1,7 @@ import { fetchOrganization } from '@/lib/services/organizationService'; import { fetchAllSessions, - fetchAsset, - fetchSession, - sessionImport, + fetchSession } from '@/lib/services/sessionService'; import { fetchStage, fetchStageRecordings } from '@/lib/services/stageService'; import { ClipsPageParams } from '@/lib/types'; @@ -31,12 +29,11 @@ const fetchVideoDetails = async ( const stageRecordings = await fetchStageRecordings({ streamId }); if (!stageRecordings?.recordings[0]) return null; - return { - videoSrc: stageRecordings.recordings[0].recordingUrl, + videoSrc: `https://livepeercdn.studio/hls/${liveStage.streamSettings?.playbackId}/index.m3u8`, type: 'livepeer', name: liveStage.name, - words: liveStage.transcripts?.text, + words: liveStage.transcripts?.chunks, liveRecording: stageRecordings.recordings[0], }; } @@ -47,13 +44,13 @@ const fetchVideoDetails = async ( const stage = await fetchStage({ stage: session.stageId as string }); if (!stage?.streamSettings?.playbackId) return null; - + console.log('session', session.transcripts?.chunks); const videoSrc = await getVideoUrlAction(session); return { videoSrc, type: 'livepeer', name: session.name, - words: session.transcripts?.subtitleUrl, + words: session.transcripts?.chunks, }; } @@ -65,6 +62,7 @@ const fetchVideoDetails = async ( videoSrc: stage.source.m3u8Url, type: stage.source.type, name: stage.name, + words: stage.transcripts?.chunks, }; } @@ -106,13 +104,6 @@ const ClipsConfig = async ({ params, searchParams }: ClipsPageParams) => { clipUrl={videoDetails.videoSrc} >
- {/*
- {words?.split('\n').map((word) => ( -
- {word} -
- ))} -
*/}
{ stageSessions={stageSessions.sessions} organizationId={organizationId} animations={animations.sessions} + words={videoDetails.words} />
diff --git a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/Transcipts/index.tsx b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/Transcipts/index.tsx new file mode 100644 index 000000000..f0d2d8727 --- /dev/null +++ b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/Transcipts/index.tsx @@ -0,0 +1,42 @@ +"use client" +import { useClipContext } from '../../ClipContext'; +import { Button } from '@/components/ui/button'; +const Transcripts = ({ + words, +}: { + words: { word: string; start: number }[]; +}) => { + const { currentTime, videoRef } = useClipContext(); + + // Helper function to determine if a word should be highlighted + const isWordActive = ( + word: { word: string; start: number }, + currentTime: number + ) => { + // You might want to adjust this logic based on your requirements + return word.start <= currentTime && word.start + 1 > currentTime; + }; + + return ( +
+ + {words?.map((word, index) => ( + { + if (videoRef.current) { + videoRef.current.currentTime = word.start; + } + }} + > + {word.word} + + ))} +
+ ); +}; + +export default Transcripts; diff --git a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/clips/Clip.tsx b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/clips/Clip.tsx index e3a5639bf..b6e6874f7 100644 --- a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/clips/Clip.tsx +++ b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/clips/Clip.tsx @@ -4,12 +4,11 @@ import { Card, CardContent } from '@/components/ui/card'; import { fetchAsset } from '@/lib/services/sessionService'; import { IExtendedSession } from '@/lib/types'; import { formatDate } from '@/lib/utils/time'; -import { Asset } from 'livepeer/models/components'; import { useRouter } from 'next/navigation'; import { useEffect, useState } from 'react'; import { ProcessingStatus } from 'streameth-new-server/src/interfaces/session.interface'; import Preview from './Preview'; - +import { Asset } from 'livepeer/models/components/asset'; export default function Clip({ session }: { session: IExtendedSession }) { const { name, coverImage, assetId } = session; const [asset, setAsset] = useState(null); diff --git a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/index.tsx b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/index.tsx index 14e15c478..7105a0619 100644 --- a/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/index.tsx +++ b/packages/app/app/studio/[organization]/(no-side-bar)/clips/[stageId]/sidebar/index.tsx @@ -7,17 +7,24 @@ import CreateClipButton from '../topBar/CreateClipButton'; import AddOrEditMarkerForm from './markers/AddOrEditMarkerForm'; import { IExtendedSession } from '@/lib/types'; import ImportMarkersForm from './markers/ImportMarkersForm'; +import Transcripts from './Transcipts'; export default function Sidebar({ organizationId, stageSessions, liveRecordingId, animations, + words, }: { organizationId: string; stageSessions: IExtendedSession[]; liveRecordingId?: string; animations: IExtendedSession[]; + words?: { + word: string; + start: number; + end: number; + }[]; }) { const { isCreatingClip, isAddingOrEditingMarker, isImportingMarkers } = useClipContext(); @@ -57,9 +64,10 @@ export default function Sidebar({ ) )} - + Markers Clips + {words && Words} {} @@ -67,6 +75,11 @@ export default function Sidebar({ + {words && ( + + + + )} ); diff --git a/packages/app/app/studio/[organization]/(root)/library/[session]/components/SessionTranscriptions.tsx b/packages/app/app/studio/[organization]/(root)/library/[session]/components/SessionTranscriptions.tsx index f43d7165c..ef5e12bc6 100644 --- a/packages/app/app/studio/[organization]/(root)/library/[session]/components/SessionTranscriptions.tsx +++ b/packages/app/app/studio/[organization]/(root)/library/[session]/components/SessionTranscriptions.tsx @@ -44,27 +44,41 @@ const SessionTranscriptions = ({ }); }; - if (transcriptionState === TranscriptionStatus.processing) { - return ( -
- Processing - transcription...{' '} -

router.refresh()} - > - -

-
- ); - } + // if (transcriptionState === TranscriptionStatus.processing) { + // return ( + //
+ // Processing + // transcription...{' '} + //

router.refresh()} + // > + // + //

+ //
+ // ); + // } if ( transcriptionState === TranscriptionStatus.completed && videoTranscription ) { - return ; + return ( +
+ + +
+ ); } if (transcriptionState === TranscriptionStatus.failed) { diff --git a/packages/app/app/studio/[organization]/(root)/library/[session]/page.tsx b/packages/app/app/studio/[organization]/(root)/library/[session]/page.tsx index af872c482..1c240c830 100644 --- a/packages/app/app/studio/[organization]/(root)/library/[session]/page.tsx +++ b/packages/app/app/studio/[organization]/(root)/library/[session]/page.tsx @@ -29,7 +29,6 @@ const EditSession = async ({ params, searchParams }: studioPageParams) => { session: params.session, }); - console.log(session?.transcripts?.chunks[0]); if (!session?.playbackId || !organization) return notFound(); return ( diff --git a/packages/app/app/studio/[organization]/(root)/library/components/TableCells.tsx b/packages/app/app/studio/[organization]/(root)/library/components/TableCells.tsx index 4f11da293..855916dd3 100644 --- a/packages/app/app/studio/[organization]/(root)/library/components/TableCells.tsx +++ b/packages/app/app/studio/[organization]/(root)/library/components/TableCells.tsx @@ -106,7 +106,7 @@ const TableCells = async ({
- {!isDisabled && item.type === 'livestream' && ( + {!isDisabled && ( // item.createdAt && // new Date(item.createdAt).getTime() > // Date.now() - 7 * 24 * 60 * 60 * 1000 && ( diff --git a/packages/app/lib/actions/livepeer.ts b/packages/app/lib/actions/livepeer.ts index 68f92eb62..a5c6e0597 100644 --- a/packages/app/lib/actions/livepeer.ts +++ b/packages/app/lib/actions/livepeer.ts @@ -22,6 +22,9 @@ export const getVideoUrlAction = async ( session: IExtendedSession ): Promise => { try { + if (session.playback?.videoUrl) { + return session.playback.videoUrl; + } if (session.assetId) { const asset = await fetchAsset({ assetId: session.assetId }); if (asset?.playbackUrl) { diff --git a/packages/app/lib/actions/sessions.ts b/packages/app/lib/actions/sessions.ts index 05eff5fce..85775e448 100644 --- a/packages/app/lib/actions/sessions.ts +++ b/packages/app/lib/actions/sessions.ts @@ -14,6 +14,7 @@ import { saveSessionImport, generateTranscriptions, uploadSessionToSocialsRequest, + extractHighlights, } from '../services/sessionService'; import { ISession, @@ -280,3 +281,13 @@ export const generateTranscriptionActions = async ({ return null; } }; + + +export const extractHighlightsAction = async ({ + sessionId, +}: { + sessionId: string; +}) => { + const res = await extractHighlights({ sessionId }); + return res; +}; diff --git a/packages/app/lib/services/sessionService.ts b/packages/app/lib/services/sessionService.ts index 71bcba8af..c4a10cbf3 100644 --- a/packages/app/lib/services/sessionService.ts +++ b/packages/app/lib/services/sessionService.ts @@ -7,7 +7,7 @@ import { import { apiUrl } from '@/lib/utils/utils'; import { ISession } from 'streameth-new-server/src/interfaces/session.interface'; import { revalidatePath } from 'next/cache'; -import { Asset } from 'livepeer/models/components'; +import { Asset } from 'livepeer/models/components/asset'; import FuzzySearch from 'fuzzy-search'; import { fetchClient } from './fetch-client'; @@ -563,3 +563,18 @@ export const generateTranscriptions = async ({ throw e; } }; + +export const extractHighlights = async ({ + sessionId, +}: { + sessionId: string; +}) => { + try { + const response = await fetchClient(`${apiUrl()}/sessions/${sessionId}/highlights`, { + method: 'POST', + }); + } catch (e) { + console.log('error in extractHighlights', e); + throw e; + } +}; diff --git a/packages/server/package.json b/packages/server/package.json index 9a6068871..444be7ef9 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -40,6 +40,7 @@ "fuse.js": "^7.0.0", "google-auth-library": "^9.6.3", "googleapis": "^133.0.0", + "gpt-3-encoder": "^1.1.4", "helmet": "^7.1.0", "hpp": "^0.2.3", "http-errors": "^2.0.0", diff --git a/packages/server/src/controllers/session.controller.ts b/packages/server/src/controllers/session.controller.ts index 88bea2067..e6124725d 100644 --- a/packages/server/src/controllers/session.controller.ts +++ b/packages/server/src/controllers/session.controller.ts @@ -199,4 +199,18 @@ export class SessionController extends Controller { await this.sessionService.deleteOne(sessionId); return SendApiResponse('deleted'); } + + /** + * @summary Extract highlights from session + */ + @SuccessResponse('200') + @Post('{sessionId}/highlights') + async extractHighlights( + @Path() sessionId: string, + ): Promise> { + const highlights = await this.sessionService.extractHighlights(sessionId); + return SendApiResponse(highlights.content); + } } + + diff --git a/packages/server/src/databases/index.ts b/packages/server/src/databases/index.ts index 35d8f895a..6a13ea3ff 100644 --- a/packages/server/src/databases/index.ts +++ b/packages/server/src/databases/index.ts @@ -8,9 +8,9 @@ console.log('Database:', name); console.log('Password length:', password?.length); export const dbConnection = { - url: `mongodb://${user}:${password}@${host}/${name}?authSource=admin&retryWrites=true&w=majority`, + //rl: `mongodb://${user}:${password}@${host}/${name}?authSource=admin&retryWrites=true&w=majority`, // For local development use this url - // url: `mongodb+srv://${user}:${password}@${host}/${name}?authSource=admin`, + url: `mongodb+srv://${user}:${password}@${host}/${name}?authSource=admin`, options: { useNewUrlParser: true, useUnifiedTopology: true, diff --git a/packages/server/src/routes/routes.ts b/packages/server/src/routes/routes.ts index faef9728b..7c62e20f5 100644 --- a/packages/server/src/routes/routes.ts +++ b/packages/server/src/routes/routes.ts @@ -2734,6 +2734,36 @@ export function RegisterRoutes(app: Router,opts?:{multer?:ReturnType(SessionController)), + ...(fetchMiddlewares(SessionController.prototype.extractHighlights)), + + async function SessionController_extractHighlights(request: ExRequest, response: ExResponse, next: any) { + const args: Record = { + sessionId: {"in":"path","name":"sessionId","required":true,"dataType":"string"}, + }; + + // WARNING: This file was auto-generated with tsoa. Please do not modify it. Re-run tsoa to re-generate this file: https://github.com/lukeautry/tsoa + + let validatedArgs: any[] = []; + try { + validatedArgs = templateService.getValidatedArgs({ args, request, response }); + + const controller = new SessionController(); + + await templateService.apiHandler({ + methodName: 'extractHighlights', + controller, + response, + next, + validatedArgs, + successStatus: 200, + }); + } catch (err) { + return next(err); + } + }); + // WARNING: This file was auto-generated with tsoa. Please do not modify it. Re-run tsoa to re-generate this file: https://github.com/lukeautry/tsoa app.post('/schedule/import', authenticateMiddleware([{"jwt":["org"]}]), ...(fetchMiddlewares(ScheduleImporterController)), diff --git a/packages/server/src/services/session.service.ts b/packages/server/src/services/session.service.ts index 656452a6d..12da4df5b 100644 --- a/packages/server/src/services/session.service.ts +++ b/packages/server/src/services/session.service.ts @@ -19,6 +19,7 @@ import Organization from '@models/organization.model'; import Session from '@models/session.model'; import Stage from '@models/stage.model'; import State from '@models/state.model'; +import { ChatAPI } from '@utils/ai.chat'; import { getAsset, getDownloadUrl, getStreamRecordings } from '@utils/livepeer'; import { refreshAccessToken } from '@utils/oauth'; import { sessionTranscriptionsQueue, videoUploadQueue } from '@utils/redis'; @@ -401,4 +402,52 @@ export default class SessionService { const query = isObjectId ? { _id: id } : { slug: id }; return query; } + + async extractHighlights(sessionId: string) { + const session = await this.get(sessionId); + + if (!session.transcripts) { + throw new HttpException(400, 'Session has no transcripts'); + } + + const chunks = session.transcripts.chunks; + const chunkSlices = [ + chunks.slice(0, Math.floor(chunks.length / 3)), + chunks.slice( + Math.floor(chunks.length / 3), + Math.floor(chunks.length / 3) * 2, + ), + chunks.slice(Math.floor(chunks.length / 3) * 2, chunks.length), + ]; + + for (let i = 0; i < 1; i++) { + const chat = new ChatAPI(); + const highlights = await chat.chat([ + { + role: 'system', + content: ` + You are an expert video editor specializing in creating highlights optimized for social media platforms like TikTok, X, and Instagram. + Task: Extract segments from the transcript that are 30 to 120 seconds long and are the most engaging and impactful moments of the event that are related to ethereum technology. + Input: You will receive an array of words with timestamps from the English transcript. + Output: Return a JSON array of objects with the following structure: + { + "start": number, // Timestamp when highlight begins + "end": number, // Timestamp when highlight ends + "full_transcript": string // Complete transcript of the highlighted segment + } +start-ends should be 60 to 120 seconds long + Guidelines: + - Select engaging, impactful moments that will resonate on social media + - Each highlight should be 60-120 seconds long + - Focus on key technical insights, announcements, or memorable quotes + - Ensure the selected segments are self-contained and make sense standalone`, + }, + { + role: 'user', + content: `Here is the transcript: ${chunkSlices[i]}`, + }, + ]); + console.log(highlights); + } + } } diff --git a/packages/server/src/services/stage.service.ts b/packages/server/src/services/stage.service.ts index f031cc787..88cd72090 100644 --- a/packages/server/src/services/stage.service.ts +++ b/packages/server/src/services/stage.service.ts @@ -226,4 +226,7 @@ export default class StageService { }); return stream; } -} \ No newline at end of file + + +} + diff --git a/packages/server/src/swagger/swagger.json b/packages/server/src/swagger/swagger.json index ea622ac2d..82fff1b35 100644 --- a/packages/server/src/swagger/swagger.json +++ b/packages/server/src/swagger/swagger.json @@ -6359,6 +6359,38 @@ } } }, + "/sessions/{sessionId}/highlights": { + "post": { + "operationId": "ExtractHighlights", + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IStandardResponse_void_" + } + } + } + } + }, + "summary": "Extract highlights from session", + "tags": [ + "Session" + ], + "security": [], + "parameters": [ + { + "in": "path", + "name": "sessionId", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/schedule/import": { "post": { "operationId": "ImportSchdeule", diff --git a/packages/server/src/utils/ai.chat.ts b/packages/server/src/utils/ai.chat.ts new file mode 100644 index 000000000..735a18b58 --- /dev/null +++ b/packages/server/src/utils/ai.chat.ts @@ -0,0 +1,30 @@ +import OpenAI from 'openai'; +import { config } from '@config'; +import { ChatCompletionMessageParam } from 'openai/resources/chat/completions'; + +export class ChatAPI { + private openai: OpenAI; + private maxTokens: number; + + constructor(maxTokens: number = 12800) { + this.openai = new OpenAI({ + apiKey: config.openai.apiKey, + }); + this.maxTokens = maxTokens; + } + + async chat(messages: ChatCompletionMessageParam[]) { + + + + + const completion = await this.openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages, + response_format: { type: 'json_object' }, + }); + return completion.choices[0].message; + } + + +} diff --git a/packages/server/src/utils/whisper.ts b/packages/server/src/utils/ai.transcribes.ts similarity index 100% rename from packages/server/src/utils/whisper.ts rename to packages/server/src/utils/ai.transcribes.ts diff --git a/packages/server/workers/clips/index.ts b/packages/server/workers/clips/index.ts index c1fcef02e..d1029b181 100644 --- a/packages/server/workers/clips/index.ts +++ b/packages/server/workers/clips/index.ts @@ -64,20 +64,29 @@ const processClip = async (data: IClip) => { } const masterContent = await masterResponse.text(); console.log('✅ Successfully fetched master playlist'); - // 2. Find the 1080p variant console.log('🔍 Searching for 1080p variant in master playlist'); const linesMaster = masterContent.split('\n'); let variantUrl = ''; + let maxBandwidth = -1; for (let i = 0; i < linesMaster.length; i++) { - if (linesMaster[i].includes('1080p0')) { - variantUrl = linesMaster[i + 1].trim(); - break; + if (linesMaster[i].startsWith('#EXT-X-STREAM-INF')) { + // Parse bandwidth from the stream info + const bandwidthMatch = linesMaster[i].match(/BANDWIDTH=(\d+)/); + if (bandwidthMatch) { + const bandwidth = parseInt(bandwidthMatch[1]); + if (bandwidth > maxBandwidth) { + maxBandwidth = bandwidth; + variantUrl = linesMaster[i + 1].trim(); + } + } } } - variantUrl = clipUrl.replace('index.m3u8', variantUrl); + console.log('Selected variant URL:', variantUrl); + variantUrl = clipUrl.replace('index.m3u8', variantUrl); + console.log('Full variant URL:', variantUrl); if (!variantUrl) { console.error('❌ 1080p variant not found in master playlist'); throw new Error('1080p variant not found in master playlist'); @@ -108,7 +117,7 @@ const processClip = async (data: IClip) => { statusText: manifestResponse.statusText, }); throw new Error( - `Failed to fetch manifest: ${manifestResponse.statusText}`, + `Failed to fetch manifest ${variantUrl}: ${manifestResponse.statusText}`, ); } const manifestContent = await manifestResponse.text(); diff --git a/packages/server/workers/session-transcriptions/index.ts b/packages/server/workers/session-transcriptions/index.ts index 2f09c9094..1e4070757 100644 --- a/packages/server/workers/session-transcriptions/index.ts +++ b/packages/server/workers/session-transcriptions/index.ts @@ -1,7 +1,7 @@ import 'dotenv/config'; import { sessionTranscriptionsQueue } from '@utils/redis'; import ffmpeg from 'fluent-ffmpeg'; -import WhisperAPI from '@utils/whisper'; +import WhisperAPI from '@utils/ai.transcribes'; import { dbConnection } from '@databases/index'; import { connect } from 'mongoose'; import { tmpdir } from 'os'; @@ -10,6 +10,8 @@ import { ISession } from '@interfaces/session.interface'; import { TranscriptionStatus } from '@interfaces/state.interface'; import SessionService from '@services/session.service'; import Session from '@models/session.model'; +import fs from 'fs'; +import path from 'path'; interface SessionTranscriptionsJob { session: { @@ -113,6 +115,122 @@ const updateTranscriptionStatus = async ( }); }; +const splitAudioIntoChunks = async ( + inputPath: string, + maxChunkSize: number = 5 * 1024 * 1024 // Reduced to 5MB to be safe +): Promise => { + const tempDir = tmpdir(); + const chunkPaths: string[] = []; + + // Get audio duration + const duration = await new Promise((resolve, reject) => { + ffmpeg.ffprobe(inputPath, (err, metadata) => { + if (err) reject(err); + resolve(metadata.format.duration || 0); + }); + }); + + // Calculate chunk duration based on file size and total duration + const stats = await fs.promises.stat(inputPath); + console.log('stats', stats.size); + const numberOfChunks = Math.ceil(stats.size / maxChunkSize); + console.log('numberOfChunks', numberOfChunks); + const chunkDuration = duration / numberOfChunks; + console.log('chunkDuration', chunkDuration); + // Split into chunks + for (let i = 0; i < numberOfChunks; i++) { + const startTime = i * chunkDuration; + const chunkPath = join(tempDir, `chunk_${i}_${path.basename(inputPath)}`); + + await new Promise((resolve, reject) => { + ffmpeg(inputPath) + .setStartTime(startTime) + .setDuration(chunkDuration) + .audioCodec('libmp3lame') + .audioBitrate('16k') // Lower bitrate for smaller file size + .audioChannels(1) // Mono audio + .audioFrequency(8000) // 16kHz sample rate + .output(chunkPath) + .on('end', () => resolve()) + .on('error', reject) + .run(); + }); + + // Verify chunk size + const chunkStats = await fs.promises.stat(chunkPath); + console.log('chunkStats', chunkStats.size); + if (chunkStats.size > maxChunkSize) { + console.warn(`Chunk ${i} is too large (${chunkStats.size} bytes). Recreating with lower quality...`); + // If still too large, recreate with even lower quality + await new Promise((resolve, reject) => { + ffmpeg(inputPath) + .setStartTime(startTime) + .setDuration(chunkDuration) + .audioCodec('libmp3lame') + .audioBitrate('16k') // Even lower bitrate + .audioChannels(1) + .audioFrequency(8000) // Lower sample rate + .output(chunkPath) + .on('end', () => resolve()) + .on('error', reject) + .run(); + }); + } + + chunkPaths.push(chunkPath); + } + + return chunkPaths; +}; + +const mergeTranscripts = (chunks: any[]): any => { + let offset = 0; + const mergedWords = chunks.flatMap((chunk, index) => { + // Adjust timestamps for each chunk + const adjustedWords = chunk.words.map((word: any) => ({ + ...word, + start: word.start + offset, + end: word.end + offset + })); + + // Update offset for next chunk + if (chunks[index + 1]) { + const lastWord = chunk.words[chunk.words.length - 1]; + offset += lastWord.end; + } + + return adjustedWords; + }); + + return { + text: chunks.map(chunk => chunk.text).join(' '), + words: mergedWords + }; +}; + +const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + +const transcribeWithRetry = async (chunkPath: string, retries = 3, delay = 1000) => { + // Add size verification before attempting transcription + const stats = await fs.promises.stat(chunkPath); + const maxSize = 5 * 1024 * 1024; // 5MB + + if (stats.size > maxSize) { + throw new Error(`File size (${stats.size} bytes) exceeds Whisper's limit of ${maxSize} bytes`); + } + + for (let i = 0; i < retries; i++) { + try { + return await WhisperAPI.transcribe(chunkPath); + } catch (error) { + if (i === retries - 1) throw error; + console.log(`Attempt ${i + 1} failed, retrying after ${delay}ms...`); + await sleep(delay); + delay *= 2; // Exponential backoff + } + } +}; + export async function transcribeAudio( streamUrl: string, session: ISession, @@ -130,7 +248,7 @@ export async function transcribeAudio( '-probesize', '20M', ]) - .audioBitrate('32k') // Very low bitrate + .audioBitrate('16k') // Very low bitrate .audioCodec('libmp3lame') // Use MP3 codec .audioFrequency(8000) // 16kHz audio frequency .audioChannels(1) // Mono audio @@ -148,16 +266,34 @@ export async function transcribeAudio( .on('end', async () => { console.log('FFmpeg processing completed'); try { - const transcript = await WhisperAPI.transcribe(outputPath); + // Split audio into chunks + const chunks = await splitAudioIntoChunks(outputPath); + console.log(`Split audio into ${chunks.length} chunks`); + + // Transcribe chunks sequentially instead of in parallel + const transcriptions = []; + for (const [index, chunkPath] of chunks.entries()) { + console.log(`Processing chunk ${index + 1}/${chunks.length}`); + const transcription = await transcribeWithRetry(chunkPath); + transcriptions.push(transcription); + + // Clean up chunk file after processing + await fs.promises.unlink(chunkPath).catch(console.error); + } + + // Merge transcriptions + const mergedTranscript = mergeTranscripts(transcriptions); + + // Update session with merged transcript await Session.findByIdAndUpdate( session._id, { $set: { 'transcripts.status': TranscriptionStatus.completed, - 'transcripts.text': transcript.text, + 'transcripts.text': mergedTranscript.text, 'transcripts.lastSegmentTimestamp': 0, - 'transcripts.chunks': transcript.words, - 'transcripts.subtitleUrl': await generateVtt(transcript.words), + 'transcripts.chunks': mergedTranscript.words, + 'transcripts.subtitleUrl': await generateVtt(mergedTranscript.words), } }, { runValidators: false } @@ -166,6 +302,9 @@ export async function transcribeAudio( } catch (err) { console.error('Transcription error:', err); reject(err); + } finally { + // Clean up the original file + fs.promises.unlink(outputPath).catch(console.error); } }); diff --git a/packages/server/workers/stage-transcriptions/index.ts b/packages/server/workers/stage-transcriptions/index.ts index cc7efd9e8..a6f24db37 100644 --- a/packages/server/workers/stage-transcriptions/index.ts +++ b/packages/server/workers/stage-transcriptions/index.ts @@ -2,7 +2,7 @@ import { stageTranscriptionsQueue } from '@utils/redis'; import ffmpeg from 'fluent-ffmpeg'; import StageService from '@services/stage.service'; import { buildPlaybackUrl } from '@utils/livepeer'; -import WhisperAPI from '@utils/whisper'; +import WhisperAPI from '@utils/ai.transcribes'; import { IStage } from '@interfaces/stage.interface'; import { dbConnection } from '@databases/index'; import { connect } from 'mongoose'; diff --git a/yarn.lock b/yarn.lock index 33f3fcc73..a99e1c36e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -15009,6 +15009,11 @@ got@^11.8.5: p-cancelable "^2.0.0" responselike "^2.0.0" +gpt-3-encoder@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/gpt-3-encoder/-/gpt-3-encoder-1.1.4.tgz#d6cdaacf5824857e133b6065247c757fc7e4fa72" + integrity sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg== + graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.11, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9: version "4.2.11" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"