-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsup_ee_observed.py
122 lines (94 loc) · 3.01 KB
/
sup_ee_observed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#-------------------------------------------------
#FIFTH - look for evidence of exploration-exploitation
# trade off
#-------------------------------------------------
#import modules
import json
import pickle
import scipy.stats.mstats as ssm
import numpy as np
import random
import datetime
import bisect
from pylab import *
from scipy.stats.stats import pearsonr
# ------------------------------------------------
# import data from json
print "loading data"
fh=open('data_by_cookie.json')
data=json.load(fh)
# --------------------------------------------
# look at subsample of people who played more than x times
print "organising data"
big = {k: data[k] for k in data if len(data[k]) > 9} #pythonic
# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
for key in big:
maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])
# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)
#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))
#decile={}
#
#for key in big:
# for i in prcentiles:
# if maxscore[key]>i:
# decile[key]=prcentiles.index(float(i))
#so now we know how good each player is
#now let's calc variance
av1={}
var1={}
av2={}
var2={}
first_plays = ['%.5d'%(i+1) for i in range(5)]
second_plays = ['%.5d'%(i+6) for i in range(5)]
#construct vaiables dicts
print "calculating summary stats"
#for each player make two lists, of plays 1-5 (first) and 6-10 (second)
#and calculate summary stats av1,var1 and av2, var2
for key in big:
first=[]
for attempt in first_plays:
try:
first.append(big[key][attempt][0])
except KeyError:
continue
av1[key]=mean(first)
var1[key]=var(first)
second=[]
for attempt in second_plays:
try:
second.append(big[key][attempt][0])
except KeyError:
continue
av2[key]=mean(second)
var2[key]=var(second)
#make list of summary stats
x=[]
y=[]
for key in big:
x.append(var2[key])
y.append(av1[key])
#find percentile values
prcentiles_x=[]
for p in range(100):
prcentiles_x.append(ssm.scoreatpercentile(x,p))
prcentiles_y=[]
for p in range(100):
prcentiles_y.append(ssm.scoreatpercentile(y,p))
#make dict of prcentile values for each statistic for each player
prcentile_xindex={key: bisect.bisect(prcentiles_x,var2[key]) for key in big}
prcentile_yindex={key: bisect.bisect(prcentiles_y,av1[key]) for key in big}
print "saving data"
#convert to list
xlist=[]
ylist=[]
for key in prcentile_xindex:
xlist.append(prcentile_xindex[key])
ylist.append(prcentile_yindex[key])
pickle.dump(xlist, open('save_a5_xlist.p', 'wb'))
pickle.dump(ylist, open('save_a5_ylist.p', 'wb'))