-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstepcounts.py
60 lines (42 loc) · 1.73 KB
/
stepcounts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import xml.etree.ElementTree as ET
import pandas as pd
import matplotlib.pyplot as plt
def plot_daily_counts(counts):
plt.scatter(counts["date"], counts["value"], c="blue")
plt.title("Daily Step Counts")
plt.xlabel("Date")
plt.ylabel("Steps")
fig = plt.gcf()
fig.set_size_inches(30.5, 15.5)
fig.savefig("out/step_counts.png", dpi=100)
plt.clf()
def main():
outdir = "./out"
if not os.path.exists(outdir):
os.mkdir(outdir)
plt.style.use("fivethirtyeight")
print("Extracting step counts...\n")
tree = ET.parse("data/export.xml")
root = tree.getroot()
# Step counts are stored under an element like `<Record type="HKQuantityTypeIdentifierStepCount" .../>`
records_list = [x.attrib for x in root.iter("Record")]
df = pd.DataFrame(records_list)
step_counts = df[df["type"] == "HKQuantityTypeIdentifierStepCount"]
# Only use one device (e.g. Phone vs an Apple watch)
step_counts = step_counts[step_counts["sourceName"].str.contains("Phone")]
step_counts = pd.DataFrame(step_counts)
# Apple Health data is stored in XML tags as strings.
# Rework strings to dates and numbers
for col in ["creationDate", "startDate", "endDate"]:
step_counts[col] = pd.to_datetime(step_counts[col])
step_counts["date"] = step_counts[col].dt.date
step_counts["value"] = pd.to_numeric(step_counts["value"])
print("Number of records: " + str(step_counts.shape[0]))
daily_counts = step_counts.groupby("date", as_index=False)["value"].sum()
# Output processed data as CSV
daily_counts.to_csv("out/dailycounts.csv")
step_counts.to_csv("out/stepcounts.csv")
plot_daily_counts(daily_counts)
if __name__ == "__main__":
main()