This repository has been archived by the owner on Jul 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
livesdataexporter.py
167 lines (142 loc) · 5.39 KB
/
livesdataexporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import json
import os
from datetime import datetime, timedelta
from dateutil import parser
import csv
import zipfile
import pymongo
class LivesDataExporter:
def __init__(self, collection, locality, data_dir=os.path.join(os.path.dirname(__file__), "livesData")):
"""
:type self: LivesDataExporter
:param collection: pymongo.collection.Collection
:param locality: str
:param data_dir: str
"""
self.collection = collection
self.locality = locality.title()
self.locality_file_name = self.locality.replace(" ", "_")
self.data_dir = data_dir
self.archive_file = os.path.join(data_dir, self.locality_file_name + ".zip")
self.tmp_dir = os.path.join(data_dir, "tmp")
if not os.path.exists(data_dir):
os.makedirs(data_dir)
os.makedirs(self.tmp_dir)
self.metadata_file = os.path.join(data_dir, self.locality_file_name + ".json")
self.metadata = None
self.__load_metadata()
@property
def data_dir(self):
"""get path to data export directory
:rtype : str
"""
return self.data_dir
def __load_metadata(self):
"""loads metadata about the status of this export from filesystem
saves a new metadata file if none exists
:rtype : None
"""
try:
with open(self.metadata_file, "r") as metadata_file:
self.metadata = json.load(metadata_file)
except IOError:
self.metadata = dict(path="lives-file/" + self.locality_file_name + ".zip",
available=False,
locality=self.locality)
return None
@property
def has_results(self):
"""tell us if the provided locality has any data to export
:rtype : bool
"""
return self.collection.find_one({"locality": self.locality}) is not None
@property
def available_localities(self):
"""get a list of available localities
:rtype : list
"""
localities = []
for locality in self.collection.distinct("locality"):
localities.append(locality)
return localities
@property
def is_stale(self):
"""tell us if the exported data was exported along enough ago to be considered stale
currently, 1 day old is stale
:rtype : bool
"""
try:
last_written = parser.parse(self.metadata["last_written"])
delta = datetime.utcnow() - last_written
stale_delta = timedelta(days=1)
if delta > stale_delta:
return True
else:
return False
except (KeyError, ValueError):
# if last_written isn't set, it's stale
return True
@property
def is_writing(self):
"""tell us if the export for this local is currently writing
:rtype : bool
"""
try:
status = self.metadata["status"]
except KeyError:
status = None
return status == "writing"
def set_write_lock(self):
"""set export status to 'writing'
:rtype : None
"""
self.metadata["status"] = "writing"
self.save_metadata()
return None
def save_metadata(self):
"""persist metatdata about file export
:rtype : None
"""
with open(self.metadata_file, "w") as metadata_file:
json.dump(self.metadata, metadata_file)
return None
def load_results(self):
"""load results for locality
:rtype : pymongo.cursor
"""
return self.collection.find({"locality": self.locality})
def write_file(self):
"""write the exported data to filesystem
:rtype : None
"""
businesses_path_tmp = os.path.join(self.tmp_dir, self.locality_file_name + "_businesses.csv")
inspections_path_tmp = os.path.join(self.tmp_dir, self.locality_file_name + "_inspections.csv")
with open(businesses_path_tmp, "w") as businesses_csv, \
open(inspections_path_tmp, "w") as inspections_csv:
b_writer = csv.writer(businesses_csv)
i_writer = csv.writer(inspections_csv)
results = self.load_results()
for vendor in results:
b_writer.writerow([vendor["_id"],
vendor["name"],
vendor["address"]
])
for inspection in vendor["inspections"]:
i_writer.writerow([vendor["_id"],
inspection["date"].strftime("%Y%m%d")])
with zipfile.ZipFile(self.archive_file, "w") as zip_file:
zip_file.write(businesses_path_tmp, os.path.join(self.locality_file_name, "businesses.csv"))
zip_file.write(inspections_path_tmp, os.path.join(self.locality_file_name, "inspections.csv"))
# delete tmp files
os.remove(businesses_path_tmp)
os.remove(inspections_path_tmp)
#update metadata
self.metadata["status"] = "complete"
self.metadata["last_written"] = datetime.utcnow().strftime("%c")
self.metadata["available"] = True
self.save_metadata()
print "Done writing " + self.locality_file_name + ".zip"
return None
@staticmethod
def format_locality_string(locality):
print locality