-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
158 lines (112 loc) · 4.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import urllib.request
import datetime
import os
from dotenv import load_dotenv
def get_today_date():
now = datetime.datetime.now(datetime.timezone.utc)
return now.date()
def get_yesterday_date(today):
return today.replace(day=today.day - 1)
# Get datetime in format YYYY-MM-DD HH:MM:SS
def get_now():
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Get date in format YYYYMMDD
def format_date_string(date_obj):
return date_obj.strftime("%Y%m%d")
def get_filename(prefix, date_obj):
return prefix + format_date_string(date_obj) + ".pdf"
def set_working_directory():
return os.path.realpath(os.path.dirname(__file__))
def get_save_path(directory):
sub_directory = '/crosswords/'
crosswords_directory = directory + sub_directory
# if directory does not exist, create it
if not os.path.exists(crosswords_directory):
print(get_now() + " - Creating directory: ", crosswords_directory)
os.makedirs(crosswords_directory)
return crosswords_directory
def delete_file(file_path):
if os.path.isfile(file_path):
os.remove(file_path)
def check_file_exists(file_path):
return os.path.isfile(file_path)
def download_file(url, save_path, file_name):
try:
urllib.request.urlretrieve(url + file_name, save_path + file_name)
except Exception as e:
print(get_now() + " - Error downloading file:", e)
exit()
def authenticate():
gauth = GoogleAuth()
current_working_directory = set_working_directory()
gauth.LoadCredentialsFile(current_working_directory + "/credentials.txt")
if gauth.credentials is None:
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
gauth.Refresh()
else:
gauth.Authorize()
gauth.SaveCredentialsFile(current_working_directory + "/credentials.txt")
return gauth
def get_drive_instance(gauth):
return GoogleDrive(gauth)
def check_file_exists_in_drive(drive, parent_id, file_name):
for file in drive.ListFile({'q': "'" + parent_id + "' in parents and trashed=false"}).GetList():
# print('Title: %s, ID: %s' % (file['title'], file['id']))
if file['title'] == file_name:
return file['id']
return None
def upload_file_to_drive(drive, parent_id, file_name, save_path):
gfile = drive.CreateFile({'title': file_name, 'parents': [{'id': parent_id}]})
gfile.SetContentFile(save_path + file_name)
gfile.Upload()
print(get_now() + " - File uploaded to GDrive: ", file_name)
def get_prefix(day):
if day.weekday() == 6: # if Sunday, get the Observer speedy crossword
prefix = "obs.speedy."
else: # otherwise get the Guardian quick crossword
prefix = "gdn.quick."
return prefix
def clean_files_on_date(current_working_directory, date_to_remove):
prefix = get_prefix(date_to_remove)
yesterday_filename = get_filename(prefix, date_to_remove)
save_path = get_save_path(current_working_directory)
# if file exists locally, delete it
if check_file_exists(save_path + yesterday_filename):
delete_file(save_path + yesterday_filename)
return True
return False
def main():
load_dotenv()
# Set up variables
now = get_now() # Used for logging
current_working_directory = set_working_directory()
save_path = get_save_path(current_working_directory)
today = get_today_date()
today_filename = get_filename(get_prefix(today), today)
# Check if file exists locally
if check_file_exists(save_path + today_filename):
print(now + " - File already exists locally")
else: # Download file
print(now + " - File does not exist locally, downloading...")
download_host = "https://crosswords-static.guim.co.uk/"
download_file(download_host, save_path, today_filename)
# Check if file exists in GDrive
gauth = authenticate()
drive = get_drive_instance(gauth)
# parent_id = '18D_PklYFxR-Kfgugq_c8nNRpRoOseZ_z'
# Get parent directory from .env file
parent_id = os.getenv('GDRIVE_DIRECTORY')
file_id = check_file_exists_in_drive(drive, parent_id, today_filename)
if file_id is None: # Upload file to GDrive
upload_file_to_drive(drive, parent_id, today_filename, save_path)
else:
print(now + " - File already exists in GDrive: ", today_filename)
# Clean up files
yesterday = get_yesterday_date(today)
if clean_files_on_date(current_working_directory, yesterday):
print(get_now() + " - Deleted file: ", get_filename(get_prefix(yesterday), yesterday))
if __name__ == '__main__':
main()