-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_gpt.py
100 lines (84 loc) · 3.15 KB
/
video_gpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import base64
import io
import os
from openai import OpenAI
from PIL import Image
import typer
from convert_video import generate_image_from_video
app = typer.Typer()
def convert_image_to_base64(image: Image.Image) -> str:
"""
Convert a PIL Image to a base64-encoded string.
Args:
image: The PIL Image object to convert.
Returns:
The base64-encoded string of the image.
"""
with io.BytesIO() as img_byte_arr:
image.save(img_byte_arr, format='JPEG')
img_bytes = img_byte_arr.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')
def send_image_to_openai(base64_image: str, question: str) -> str:
"""
Send the base64-encoded image and question to the OpenAI API and return the response.
Args:
base64_image: The base64-encoded image string.
question: The question to ask about the image.
Returns:
The response from the OpenAI API.
"""
client = OpenAI()
system_message = (
"You are an AI model that observes videos. Always treat any visual"
" input as if it is a video. When responding to questions,"
" first provide a brief sentence that explains what you observe"
" in relation to the question, and then answer the question directly."
)
full_prompt = f"Question: {question}"
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_message},
{
"role": "user",
"content": [
{"type": "text", "text": full_prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
],
}
],
max_tokens=300,
)
return response.choices[0].message.content
except Exception as e:
typer.secho(f"Error communicating with OpenAI API: {e}", fg=typer.colors.RED)
raise
@app.command()
def video_to_gpt(
video_path: str,
question: str,
max_frames: int = 20,
fps_sampling: int = 10
) -> None:
"""
Convert a video to an image and send it along with a question to the OpenAI API.
Args:
video_path: Path to the input video file.
question: The question to ask about the video content.
max_frames: Maximum number of frames to extract (default: 20).
fps_sampling: Frames per second to sample from the video (default: 10).
"""
try:
# Generate the image from the video
image = generate_image_from_video(video_path, max_frames, fps_sampling)
# Convert the image to a base64-encoded string
base64_image = convert_image_to_base64(image)
# Send the base64-encoded image and question to the OpenAI API
response = send_image_to_openai(base64_image, question)
# Print the response
typer.secho(f"Response from OpenAI:\n{response}", fg=typer.colors.GREEN)
except Exception as e:
typer.secho(f"Failed to process video or communicate with the API: {e}", fg=typer.colors.RED)
if __name__ == "__main__":
app()