-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoutreach.py
132 lines (105 loc) · 5.07 KB
/
outreach.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""Starting point of the application."""
import argparse
import os
import asyncio
from datetime import datetime, timedelta
from dotenv import load_dotenv
from video_utils import extract_video_details
from reddit_search import get_reddit_instance, search_posts, RedditPost
from post_analysis import analyze_posts, generate_engagement_content
from keyword_extractor import get_relevant_keywords
from cache_utils import get_video_hash, cache_result
from csv_utils import save_posts_to_csv
# Constants
COMMENT_THRESHOLD = 10
TIME_THRESHOLD_IN_MONTHS = 3 # in months
SECTION_SEPARATOR = "=" * 20
POST_SEPARATOR = "-" * 20
# Load environment variables from .env file at the start of the script
load_dotenv()
def filter_posts(posts: list[RedditPost]) -> list[RedditPost]:
"""
Filter posts based on comment count and age.
:param posts: list of Reddit submissions to filter.
:return: list of filtered Reddit submissions.
"""
now = datetime.utcnow()
threshold_date = now - timedelta(days=TIME_THRESHOLD_IN_MONTHS * 30)
filtered_posts = [
post for post in posts
if post.num_comments <= COMMENT_THRESHOLD and \
datetime.utcfromtimestamp(post.created_utc) > threshold_date
]
return filtered_posts
@cache_result("video_details")
async def get_video_details(video_url: str, video_hash: str) -> tuple:
"""Extract video details and save to cache if not already cached."""
title, description = extract_video_details(video_url)
return title, description
@cache_result("keywords")
async def get_keywords(video_title: str, video_description: str, video_hash: str) -> list:
"""Get relevant keywords and save to cache if not already cached."""
return await get_relevant_keywords(video_title, video_description)
@cache_result("filtered_posts")
async def get_reddit_posts(reddit, keywords: list, video_hash: str) -> list[RedditPost]:
"""Search for Reddit posts and save to cache if not already cached."""
posts = await search_posts(reddit, keywords)
return filter_posts(posts)
@cache_result("relevant_posts")
async def analyze_reddit_posts(posts: list[RedditPost], video_title: str, video_description: str, video_hash: str) -> list:
"""Analyze Reddit posts for relevance and save to cache if not already cached."""
return await analyze_posts(posts, video_title, video_description)
async def main(video_url: str) -> None:
"""
Main function to extract video details and initialize Reddit.
:param video_url: URL of the YouTube video.
"""
# Generate video hash
video_hash = get_video_hash(video_url)
# Extract video details
video_title, video_description = await get_video_details(video_url=video_url, video_hash=video_hash)
print(f"\n{SECTION_SEPARATOR}\n📹 Video Title: {video_title}\n{SECTION_SEPARATOR}")
print(f"\n📄 Video Description:\n{video_description}\n{SECTION_SEPARATOR}")
# Get relevant keywords
keywords = await get_keywords(video_title=video_title, video_description=video_description, video_hash=video_hash)
if not keywords:
print("Error: Unable to extract keywords.")
return
print(f"🔑 Suggested Keywords:\n{' - '.join(keywords)}\n{SECTION_SEPARATOR}")
# Initialize Reddit
reddit = None
try:
reddit = await get_reddit_instance()
print(f"🚀 Reddit initialized successfully.\n{SECTION_SEPARATOR}")
# Search for posts based on keywords
posts = await get_reddit_posts(reddit=reddit, keywords=keywords, video_hash=video_hash)
if not posts:
print("Error: Unable to find matching posts.")
return
print(f"🔍 Found {len(posts)} posts matching the criteria. Analyzing relevance...\n{SECTION_SEPARATOR}")
# Analyze posts for relevance
relevant_posts = await analyze_reddit_posts(posts=posts, video_title=video_title, video_description=video_description, video_hash=video_hash)
if not relevant_posts:
print("Error: No relevant posts found.")
return
print(f"✔️ Found {len(relevant_posts)} relevant posts. Generating comments...\n{SECTION_SEPARATOR}")
# Generate engagement content
comments = await generate_engagement_content(video_url, video_title, relevant_posts)
for post, comment in zip(relevant_posts, comments):
print(f"📝 Post Title: {post.title}")
print(f"💬 Generated Comment: {comment}")
print(f"🔗 Post URL: {post.url}\n{POST_SEPARATOR}")
# Save to CSV
csv_path = save_posts_to_csv(relevant_posts, comments, f"{video_hash}_relevant_posts.csv")
print(f"📁 Relevant posts and comments have been saved to {csv_path}")
except RuntimeError as e:
print(f"Error initializing Reddit: {e}")
finally:
if reddit is not None:
await reddit.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extract video details and initialize Reddit.")
parser.add_argument("video_url", type=str, help="URL of the YouTube video")
args = parser.parse_args()
asyncio.run(main(args.video_url))