-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfirstpost.py
129 lines (112 loc) · 3.72 KB
/
firstpost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import datetime
import json
import sys
import time
import ddb
import configuration
import utils
class FirstPost(object):
table_name = "FirstPost"
def __init__(self, fake=False):
self.ddb = ddb.DDB(self.table_name, [('slack_uid', 'S')])
self.table = self.ddb.get_table()
self.users = {}
self.modified = {}
self.count = None
self.channel = None
self.fake = fake
def date(self, ts):
"""
return yyyy-mm-dd for ts
"""
localtime = time.localtime(ts)
return time.strftime("%Y-%m-%d", localtime)
def days(self, ts):
"""
return date difference between today and the ts
"""
then = datetime.datetime.fromtimestamp(ts).date()
now = datetime.date.today()
diff = now - then
return diff.days
def get(self, key):
if key in self.users:
return self.users[key]
response = self.table.get_item(Key={'slack_uid': key})
item = response.get("Item")
if item:
item['ts'] = int(item['ts'])
item['url'] = utils.make_url(item['slack_cid'], item['message_id'])
item['days'] = self.days(item['ts'])
item['date'] = self.date(item['ts'])
self.users[key] = item
return item
def set_channel(self, cid):
self.count = 0
self.channel = cid
def print_progress(self, ts):
s = None
if self.count % 10000 == 0:
f = time.localtime(ts)
s = time.strftime("%Y-%m-%d", f)
elif self.count % 2000 == 0:
s = str(self.count)
elif self.count % 200 == 0:
s = "."
if s:
sys.stdout.write(s)
sys.stdout.flush()
def message(self, message):
# print("message: {}".format(message))
uid = message.get('user')
if not uid:
return
if message.get("subtype") == "channel_join":
return
mid = message['ts']
ts = int(float(mid))
entry = self.get(uid)
self.count += 1
self.print_progress(ts)
if entry:
# We have an existing entry. Was it later than this one?
if entry['ts'] > ts:
# The existing entry is later than this one.
entry['ts'] = ts
entry['slack_cid'] = self.channel
entry['message_id'] = str(mid)
else:
# No existing entry -- this is the first one
self.users[uid] = {
"slack_uid": uid,
"slack_cid": self.channel,
"message_id": str(mid),
"ts": ts
}
return message
def get_channel(self, cid):
return self.get(cid)
def firstpost_count(self, start_date, days):
"""
based on a yyyy-mm-dd start_date and an int days, report how many
people had their first post in this period
"""
(y, m, d) = [int(x) for x in start_date.split("-")]
dt = datetime.datetime(y, m, d, 0, 0, 0)
report_start_ts = dt.timestamp()
report_end_ts = report_start_ts + 86400 * days
matches = []
for item in self.ddb.items(self.ddb.get_table()):
ts = item['ts']
if ts >= report_start_ts and ts <= report_end_ts:
matches.append(item)
return len(matches)
def save(self):
# print("self.users: {}".format(json.dumps(self.users, indent=4)))
with self.table.batch_writer() as batch:
for uid in self.users:
row = self.users[uid]
if not row:
continue
# print("Inserting new {}".format(row))
batch.put_item(row)