-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgithub-trending.py
88 lines (81 loc) · 3.28 KB
/
github-trending.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ----
# This script is used to scrape the GitHub Trending page and extract the data into a JSON object.
# !uv pip install fast-html2md hrequests google-genai python-dotenv
# create .env file with GOOGLE_API_KEY=your_api_key
# uv run python examples/github-trending.py
# ----
import os
import hrequests
from fast_html2md import HTMLToMarkdown
from google import genai
from google.genai import types
from dotenv import load_dotenv
load_dotenv()
client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
def main():
url = "https://github.com/trending"
response = hrequests.get(url)
html = response.text
converter = HTMLToMarkdown()
markdown = converter.convert(html)
# print(markdown)
response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=f"""
Extract the following Markdown into a JSON object:
{markdown}
""",
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema={
"type": "object",
"properties": {
"repositories": {
"type": "array",
"items": {
"type": "object",
"properties": {
"rank": {"type": "integer"},
"name": {"type": "string"},
"owner": {"type": "string"},
"repository_name": {"type": "string"},
"description": {"type": "string"},
"language": {"type": "string"},
"stars": {"type": "integer"},
"forks": {"type": "integer"},
"todays_stars": {"type": "number"},
"built_by": {
"type": "array",
"items": {
"type": "object",
"properties": {
"username": {"type": "string"},
"avatar_url": {"type": "string"},
"href": {"type": "string"},
},
"required": ["username", "href"],
},
},
"href": {"type": "string"},
},
"required": [
"rank",
"name",
"owner",
"repository_name",
"description",
"stars",
"forks",
"href",
],
},
},
},
"required": ["repositories"],
"description": "Schema for the JSON output of scraping GitHub Trending page.",
},
),
)
print(response.text)
if __name__ == "__main__":
main()