-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcngal_calendar.py
executable file
·266 lines (230 loc) · 8.32 KB
/
cngal_calendar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Documentation
# CnGal: https://api.cngal.org/swagger/index.html
# ICS: https://icspy.readthedocs.io/en/stable/api.html#event
# Dateparser: https://dateparser.readthedocs.io/en/latest/settings.html#handling-incomplete-dates
import csv
import json
import os
import re
import sys
from datetime import datetime, timedelta
import dateparser
import requests
from ics import Calendar, Event
# Output files
_OUTPUT_FOLDER = "output/"
_CSV_FILE = "cngal-release.txt"
_JSON_FILE = "cngal-release.json"
_ICS_FILE = "cngal-calendar.ics"
# Date format
# Mid year
_MID_YEAR = "-09-15"
_YEAR_ONLY_REGEX = "^(\\d{4})$"
_YYYYMM_ONLY_REGEX = "^(\\d{4}-\\d{2})$"
_TO_REPLACE = (
# Regular time string
# Time span first
("q1-q2", "4月"),
("q2-q3", "7月"),
("q3-q4", "10月"),
# Keyword second
("spring|春(季|节)?", "3月"),
("summer|夏(季)?", "6月"),
("fall|秋(季)?", "9月"),
("winter|冬(季)?", "12月"),
("q1(季度)?", "2月"),
("q2(季度)?", "5月"),
("q3(季度)?", "8月"),
("q4(季度)?", "11月"),
# Alias third
("第一季度", "2月"),
("第二季度", "5月"),
("第三季度", "8月"),
("第四季度", "11月"),
("年初", "年1月"),
("年[底内末]", "年12月"),
("上旬", "10日"),
("中旬", "20日"),
("下旬", "28日"),
("月[底内末]", "月"),
# Annotation last
("号", "日"),
# ("年", "."),
# ("月", "."),
# ("日", "."),
# Stop words
("即将", ""),
("预[计定期]", ""),
("发[售布].*?$", ""),
("推出.*?$", ""),
("宣布.*?$", ""),
("[Ww]ishlist.*?$", ""),
("TB[AD]|tb[ad]", ""),
(" ", ""),
)
# Convert time string like `2024年8月` to partial/full YYYY-MM-DD
_TO_REPLACE_ISO = (
(r"(\d+)年(\d{1,2})月(\d{1,2})日", r"\1-\2-\3"), # 年月日
(r"(\d+)年(\d{1,2})月", r"\1-\2"), # 年月
(r"(\d+)年", r"\1"), # 年
)
# Exclude outdated ID, not meant to be misused as personal blocklist
_INDEX_FILTER = [2962, 5584]
# Cli testing one-liner:
# curl -X 'GET' 'https://api.cngal.org/api/home/ListUpcomingGames' -H 'accept: application/json'
def get_list():
api_url = "https://api.cngal.org"
api_upcoming = "/api/home/ListUpcomingGames"
headers = {"Content-Type": "application/json; charset=utf-8"}
url = api_url + api_upcoming
response = requests.get(url, headers=headers, timeout=30)
if response.status_code == 200:
print("Request successful")
if response.text is None:
print("Empty response")
sys.exit()
return response.json()
else:
print("Request failed")
print(response)
sys.exit()
# Process & Write results to JSON & CSV
def process_json(results):
base_url = "https://www.cngal.org/"
processed_results = []
for result in results:
# Extract index from url
url = base_url + result["url"]
index = re.search(r"index/(\d+)", url).group(1)
# Skip deprecated index
if int(index) in _INDEX_FILTER:
continue
# Build new json
processed_result = {
"url": url,
"index": index,
"title": result["name"],
"released": result["publishTime"],
"raw_date": result["publishTime"],
"intro": result["briefIntroduction"],
}
# Make ambiguous release date like `预计2024年发售` and `2022年底` machine-readable
for pair in _TO_REPLACE:
processed_result["released"] = re.sub(
pair[0], pair[1], processed_result["released"].lower()
)
for pair in _TO_REPLACE_ISO:
processed_result["released"] = re.sub(
pair[0], pair[1], processed_result["released"]
)
# Format date string to ISO date
if processed_result["released"] != "":
date_parts = processed_result["released"].split("-")
if len(date_parts) == 2:
processed_result["released"] = f"{date_parts[0]}-{date_parts[1]:0>2}"
elif len(date_parts) == 3:
processed_result["released"] = (
f"{date_parts[0]}-{date_parts[1]:0>2}-{date_parts[2]:0>2}"
)
# Ignore release without valid date
if processed_result["released"]:
processed_results.append(processed_result)
# Save raw results to JSON file
with open(
f"{_OUTPUT_FOLDER + _JSON_FILE}",
"w",
encoding="utf-8",
) as file:
json.dump(processed_results, file, ensure_ascii=False, indent=2)
# Save compact results to CSV file
with open(
_OUTPUT_FOLDER + _CSV_FILE, mode="w", newline="", encoding="utf-8"
) as csv_file:
fields_to_save = ["index", "title", "raw_date"]
# Use semi-column seperator to avoid mismatches
writer = csv.DictWriter(csv_file, fieldnames=fields_to_save, delimiter=";")
writer.writeheader()
for result in processed_results:
# Compact fields
selected_data = {field: result[field] for field in fields_to_save}
writer.writerow(selected_data)
return processed_results
# Function borrowed from SteamWishlistCalendar
# https://github.com/icue/SteamWishlistCalendar/blob/166cc44fec28b01771ac39def0a340940d2a5bf3/swc.py#L34-L52
def last_day_of_next_month(dt):
"""
Returns the datetime of the last day of the next month.
Args:
dt: A datetime.datetime object.
Returns:
A datetime.datetime object.
"""
year = dt.year
next_next_month = dt.month + 2
if next_next_month > 12:
next_next_month -= 12
year = dt.year + 1
# Subtract 1 day from the first day of the next next month, to get the last day of next month.
return datetime(year, next_next_month, 1) - timedelta(days=1)
# Make calendar
def make_calendar(processed_results):
cal = Calendar(creator="CnGalCalendar")
now = datetime.now()
for result in processed_results:
description_suffix = ""
description = result["url"] + "\n" + result["intro"]
index = result["index"]
title = result["title"]
release_date = result["released"]
# Parse date to better fit into reality
# Match release date like `2026`
year_only_match = re.match(_YEAR_ONLY_REGEX, release_date)
if year_only_match:
year = year_only_match.group(1)
# If Sep 15 of this year passed, use the end of year
mid_release_date = datetime.strptime(year + _MID_YEAR, "%Y-%m-%d").date()
release_date = year + (
_MID_YEAR if mid_release_date > now.date() else "-12-31"
)
description_suffix = f'\n发售日估算自 "{result["raw_date"]}"'
# Complete remaining release date like `2024-03`
yyyymm_only_match = re.match(_YYYYMM_ONLY_REGEX, release_date)
if yyyymm_only_match:
release_date = dateparser.parse(
release_date,
settings={
# Set time zone to UTC+8
"TIMEZONE": "Asia/Shanghai",
"PREFER_DAY_OF_MONTH": "last",
"PREFER_DATES_FROM": "future",
},
)
while release_date.date() < now.date():
# If the estimated release date has already passed,
# pick the earliest upcoming last-of-a-month date
release_date = last_day_of_next_month(release_date)
# Only show estimation message in above cases
description_suffix = f'\n发售日估算自 "{result["raw_date"]}"'
# Ensure release_date is a datetime object
if isinstance(release_date, str):
release_date = datetime.strptime(release_date, "%Y-%m-%d")
# TODO: include more info
event = Event(
uid=index,
summary=title,
description=description + description_suffix,
begin=release_date,
last_modified=now,
dtstamp=now,
categories=["cngal"],
)
event.make_all_day()
cal.events.append(event)
with open(_OUTPUT_FOLDER + _ICS_FILE, "w", encoding="utf-8") as f:
f.write(cal.serialize())
os.makedirs(_OUTPUT_FOLDER, exist_ok=True)
j = get_list()
events = process_json(j)
make_calendar(events)