-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathdata.py
108 lines (93 loc) · 2.75 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
This file loads the data from the data directory and shows you how.
Feel free to change the contents of this file!
Do ensure these functions remain functional:
- get_business(city, business_id)
- get_reviews(city, business_id=None, user_id=None, n=10)
- get_user(username)
"""
import os
import json
import random
DATA_DIR = "data"
def load_cities():
"""
Finds all cities (all directory names) in ./data
Returns a list of city names
"""
return os.listdir(DATA_DIR)
def load(cities, data_filename):
"""
Given a list of city names,
for each city extract all data from ./data/<city>/<data_filename>.json
Returns a dictionary of the form:
{
<city1>: [<entry1>, <entry2>, ...],
<city2>: [<entry1>, <entry2>, ...],
...
}
"""
data = {}
for city in cities:
city_data = []
with open(f"{DATA_DIR}/{city}/{data_filename}.json", "r") as f:
for line in f:
city_data.append(json.loads(line))
data[city] = city_data
return data
def get_business(city, business_id):
"""
Given a city name and a business id, return that business's data.
Returns a dictionary of the form:
{
name:str,
business_id:str,
stars:str,
...
}
"""
for business in BUSINESSES[city]:
if business["business_id"] == business_id:
return business
raise IndexError(f"invalid business_id {business_id}")
def get_reviews(city, business_id=None, user_id=None, n=10):
"""
Given a city name and optionally a business id and/or auser id,
return n reviews for that business/user combo in that city.
Returns a dictionary of the form:
{
text:str,
stars:str,
...
}
"""
def should_keep(review):
if business_id and review["business_id"] != business_id:
return False
if user_id and review["user_id"] != user_id:
return False
return True
reviews = REVIEWS[city]
reviews = [review for review in reviews if should_keep(review)]
return random.sample(reviews, min(n, len(reviews)))
def get_user(username):
"""
Get a user by its username
Returns a dictionary of the form:
{
user_id:str,
name:str,
...
}
"""
for city, users in USERS.items():
for user in users:
if user["name"] == username:
return user
raise IndexError(f"invalid username {username}")
CITIES = load_cities()
USERS = load(CITIES, "user")
BUSINESSES = load(CITIES, "business")
REVIEWS = load(CITIES, "review")
TIPS = load(CITIES, "tip")
CHECKINS = load(CITIES, "checkin")